/*
 * Copyright (c) 2000-2005 by Hewlett-Packard Company.  All rights reserved.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

#include "private/gc_priv.h"

#if defined(THREAD_LOCAL_ALLOC)

#  if !defined(THREADS) && !defined(CPPCHECK)
#    error Invalid config - THREAD_LOCAL_ALLOC requires GC_THREADS
#  endif

#  include "private/thread_local_alloc.h"

#  if defined(USE_COMPILER_TLS)
__thread GC_ATTR_TLS_FAST
#  elif defined(USE_WIN32_COMPILER_TLS)
__declspec(thread) GC_ATTR_TLS_FAST
#  endif
    GC_key_t GC_thread_key;

static GC_bool keys_initialized;

#  ifndef GC_NO_DEINIT
GC_INNER void
GC_reset_thread_local_initialization(void)
{
  keys_initialized = FALSE;
  /* TODO: Dispose resources associated with `GC_thread_key`. */
}
#  endif

/*
 * Return a single nonempty free list `fl` to the global one pointed to
 * by `gfl`.
 */
static void
return_single_freelist(void *fl, void **gfl)
{
  if (NULL == *gfl) {
    *gfl = fl;
  } else {
    void *q = fl;
    void **q_ptr;

    GC_ASSERT(GC_size(fl) == GC_size(*gfl));
    /* Concatenate. */
    do {
      q_ptr = &obj_link(q);
      q = *q_ptr;
    } while (ADDR(q) >= HBLKSIZE);
    GC_ASSERT(NULL == q);
    *q_ptr = *gfl;
    *gfl = fl;
  }
}

/*
 * Recover the contents of the free-list array `fl` into the global one
 * `gfl`.
 */
static void
return_freelists(void **fl, void **gfl)
{
  int i;

  for (i = 1; i < GC_TINY_FREELISTS; ++i) {
    if (ADDR(fl[i]) >= HBLKSIZE) {
      return_single_freelist(fl[i], &gfl[i]);
    }
    /*
     * Clear `fl[i]`, since the thread structure may hang around.
     * Do it in a way that is likely to trap if we access it.
     */
    fl[i] = (ptr_t)NUMERIC_TO_VPTR(HBLKSIZE);
  }
  /* The 0 granule free list really contains 1 granule objects. */
  if (ADDR(fl[0]) >= HBLKSIZE
#  ifdef GC_GCJ_SUPPORT
      && ADDR(fl[0]) != ERROR_FL
#  endif
  ) {
    return_single_freelist(fl[0], &gfl[1]);
  }
}

#  ifdef USE_PTHREAD_SPECIFIC
/*
 * Re-set the TLS value on thread cleanup to allow thread-local allocations
 * to happen in the TLS destructors.  `GC_unregister_my_thread()` (and
 * similar routines) will finally set the `GC_thread_key` to `NULL`
 * preventing this destructor from being called repeatedly.
 */
static void
reset_thread_key(void *v)
{
  pthread_setspecific(GC_thread_key, v);
}
#  else
#    define reset_thread_key 0
#  endif

GC_INNER void
GC_init_thread_local(GC_tlfs p)
{
  int kind, j, res;

  GC_ASSERT(I_HOLD_LOCK());
  if (UNLIKELY(!keys_initialized)) {
#  ifdef USE_CUSTOM_SPECIFIC
    /* Ensure proper alignment of a "pushed" GC symbol. */
    GC_ASSERT(ADDR(&GC_thread_key) % ALIGNMENT == 0);
#  endif
    res = GC_key_create(&GC_thread_key, reset_thread_key);
    if (COVERT_DATAFLOW(res) != 0) {
      ABORT("Failed to create key for local allocator");
    }
    keys_initialized = TRUE;
  }
  res = GC_setspecific(GC_thread_key, p);
  if (COVERT_DATAFLOW(res) != 0) {
    ABORT("Failed to set thread specific allocation pointers");
  }
  for (j = 0; j < GC_TINY_FREELISTS; ++j) {
    for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
      p->_freelists[kind][j] = NUMERIC_TO_VPTR(1);
    }
#  ifdef GC_GCJ_SUPPORT
    p->gcj_freelists[j] = NUMERIC_TO_VPTR(1);
#  endif
  }
  /*
   * The zero-sized free list is handled like the regular free list, to
   * ensure that the explicit deallocation works.  However, an allocation
   * of a `gcj` object with the zero size is always an error.
   */
#  ifdef GC_GCJ_SUPPORT
  p->gcj_freelists[0] = MAKE_CPTR(ERROR_FL);
#  endif
}

GC_INNER void
GC_destroy_thread_local(GC_tlfs p)
{
  int kind;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_getspecific(GC_thread_key) == p);
  /* We currently only do this from the thread itself. */
  GC_STATIC_ASSERT(THREAD_FREELISTS_KINDS <= MAXOBJKINDS);
  for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
    if (kind == (int)GC_n_kinds) {
      /* The kind is not created. */
      break;
    }
    return_freelists(p->_freelists[kind], GC_obj_kinds[kind].ok_freelist);
  }
#  ifdef GC_GCJ_SUPPORT
  return_freelists(p->gcj_freelists, (void **)GC_gcjobjfreelist);
#  endif
}

STATIC void *
GC_get_tlfs(void)
{
#  if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC)
  GC_key_t k = GC_thread_key;

  if (UNLIKELY(0 == k)) {
    /*
     * We have not yet run `GC_init_parallel()`.  That means we also
     * are not locking, so `GC_malloc_kind_global()` is fairly cheap.
     */
    return NULL;
  }
  return GC_getspecific(k);
#  else
  if (UNLIKELY(!keys_initialized))
    return NULL;

  return GC_getspecific(GC_thread_key);
#  endif
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_kind(size_t lb, int kind)
{
  size_t lg;
  void *tsd;
  void *result;

#  if MAXOBJKINDS > THREAD_FREELISTS_KINDS
  if (UNLIKELY(kind >= THREAD_FREELISTS_KINDS))
    return GC_malloc_kind_global(lb, kind);
#  endif
  tsd = GC_get_tlfs();
  if (UNLIKELY(NULL == tsd))
    return GC_malloc_kind_global(lb, kind);

  GC_ASSERT(GC_is_initialized);
  GC_ASSERT(GC_is_thread_tsd_valid(tsd));
  lg = ALLOC_REQUEST_GRANS(lb);
#  if defined(CPPCHECK)
#    define MALLOC_KIND_PTRFREE_INIT (void *)1
#  else
#    define MALLOC_KIND_PTRFREE_INIT NULL
#  endif
  GC_FAST_MALLOC_GRANS(result, lg, ((GC_tlfs)tsd)->_freelists[kind],
                       DIRECT_GRANULES, kind, GC_malloc_kind_global(lb, kind),
                       (void)(kind == PTRFREE ? MALLOC_KIND_PTRFREE_INIT
                                              : (obj_link(result) = 0)));
#  ifdef LOG_ALLOCS
  GC_log_printf("GC_malloc_kind(%lu, %d) returned %p, recent GC #%lu\n",
                (unsigned long)lb, kind, result, (unsigned long)GC_gc_no);
#  endif
  return result;
}

#  ifdef GC_GCJ_SUPPORT

#    include "gc/gc_gcj.h"

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_gcj_malloc(size_t lb, const void *vtable_ptr)
{
  void *result;
  void **tiny_fl;
  size_t lg;

  /*
   * Unlike the other thread-local allocation calls, we assume that the
   * collector has been explicitly initialized.
   */
  GC_ASSERT(GC_gcjobjfreelist != NULL);
#    if defined(USE_PTHREAD_SPECIFIC) || defined(USE_WIN32_SPECIFIC)
  GC_ASSERT(keys_initialized);
#    else
  GC_ASSERT(GC_thread_key != 0);
#    endif

  /*
   * `gcj`-style allocation without locks is extremely tricky.
   * The fundamental issue is that we may end up marking a free list,
   * which has free-list links instead of "vtable" pointers.
   * That is usually OK, since the next object on the free list will be
   * cleared, and will thus be interpreted as containing a zero descriptor.
   * That is fine if the object has not yet been initialized.  But there
   * are interesting potential races.  In the case of incremental
   * collection, this seems hopeless, since the marker may run
   * asynchronously, and may pick up the pointer to the next free-list
   * entry (which it thinks is a "vtable" pointer), get suspended for
   * a while, and then see an allocated object instead of the "vtable".
   * This may be avoidable with either a handshake with the collector or,
   * probably more easily, by moving the free list links to the second
   * "pointer-sized" word of each object.  The latter is not a universal
   * win, since on architecture like Itanium, nonzero offsets are not
   * necessarily free.  And there may be cache fill order issues.
   * For now, we punt with the incremental collection.  This probably means
   * that the incremental collection should be enabled before we create
   * a second thread.
   */
  if (UNLIKELY(GC_incremental))
    return GC_core_gcj_malloc(lb, vtable_ptr, 0 /* `flags` */);

  tiny_fl = ((GC_tlfs)GC_getspecific(GC_thread_key))->gcj_freelists;
  lg = ALLOC_REQUEST_GRANS(lb);

  /*
   * The provided `default_expr` below forces the initialization of the
   * "vtable" pointer.  This is necessary to ensure some very subtle
   * properties required if a garbage collection is run in the middle of
   * such an allocation.  Here we implicitly also assume atomicity for the
   * free list and method pointer assignments.  We must update the free list
   * before we store the pointer.  Otherwise a collection at this point
   * would see a corrupted free list.  A real memory barrier is not needed,
   * since the action of stopping this thread will cause prior writes
   * to complete.  We assert that any concurrent marker will stop us.
   * Thus it is impossible for a mark procedure to see the allocation of the
   * next object, but to see this object still containing a free-list pointer.
   * Otherwise the marker, by misinterpreting the free-list link as a "vtable"
   * pointer, might find a random "mark descriptor" in the next object.
   */
  GC_FAST_MALLOC_GRANS(
      result, lg, tiny_fl, DIRECT_GRANULES, GC_gcj_kind,
      GC_core_gcj_malloc(lb, vtable_ptr, 0 /* `flags` */), do {
        AO_compiler_barrier();
        *(const void **)result = vtable_ptr;
      } while (0));
  return result;
}

#  endif /* GC_GCJ_SUPPORT */

GC_INNER void
GC_mark_thread_local_fls_for(GC_tlfs p)
{
  ptr_t q;
  int kind, j;

  for (j = 0; j < GC_TINY_FREELISTS; ++j) {
    for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
      /*
       * Load the pointer atomically as it might be updated concurrently
       * by `GC_FAST_MALLOC_GRANS()`.
       */
      q = GC_cptr_load((volatile ptr_t *)&p->_freelists[kind][j]);
      if (ADDR(q) > HBLKSIZE)
        GC_set_fl_marks(q);
    }
#  ifdef GC_GCJ_SUPPORT
    if (LIKELY(j > 0)) {
      q = GC_cptr_load((volatile ptr_t *)&p->gcj_freelists[j]);
      if (ADDR(q) > HBLKSIZE)
        GC_set_fl_marks(q);
    }
#  endif
  }
}

#  if defined(GC_ASSERTIONS)
/* Check that all thread-local free-lists in `p` are completely marked. */
void
GC_check_tls_for(GC_tlfs p)
{
  int kind, j;

  for (j = 1; j < GC_TINY_FREELISTS; ++j) {
    for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
      GC_check_fl_marks(&p->_freelists[kind][j]);
    }
#    ifdef GC_GCJ_SUPPORT
    GC_check_fl_marks(&p->gcj_freelists[j]);
#    endif
  }
}
#  endif

#endif /* THREAD_LOCAL_ALLOC */
